The aim of this article is to critique the given data visualization on Singapore Labour Force Participation Rate among different age-groups over the past 12 years and propose and build an improved visualization for the same using the Tableau Desktop interface.
The following packages in R will be required to use various functions offered by them for data preparation and data visualizations. plotly package is used to create the animated and interactive plots in R.
packages = c( 'knitr','corrplot', 'ggstatsplot','plotly','tidyverse','heatmaply','seriation', 'dendextend','reshape')
for(p in packages)
{
if(!require(p,character.only = T))
{
install.packages(p)
}
library(p,character.only = T)
}
Two data sets, one containing Singapore population data for the time period 2000 to 2010 and the other containing the same for the time period 2011 to 2020 are imported and viewed as follows:
starbucks_drink <- read_csv("data/starbucks_drink.csv")
unique(starbucks_drink$Category)
[1] "iced-coffee" "refreshers"
[3] "evolution-fresh" "iced-tea"
[5] "bottled-drinks" "brewed-coffee"
[7] "espresso" "frappuccino-blended-beverages"
[9] "kids-drinks-and-other" "tea"
kids_drinks_and_other <- starbucks_drink %>%
filter(Category == "kids-drinks-and-other" )
unique(kids_drinks_and_other$Name)
[1] "Cinnamon Dolce Crème" "Hot Chocolate"
[3] "Pumpkin Spice Crème" "Salted Caramel Hot Chocolate"
[5] "Steamed Apple Juice" "Vanilla Crème"
unique(kids_drinks_and_other$Milk)
[1] "Almond" "Coconut" "Nonfat milk"
[4] "Whole Milk" "2% Milk" "Soy (United States)"
[7] NA
unique(kids_drinks_and_other$`Whipped Cream`)
[1] "No Whipped Cream" "Whipped Cream" NA
#summary(kids_drinks_and_other)
ggstatsplot::ggcorrmat(
data = kids_drinks_and_other,
cor.vars = 3:15,
ggcorrplot.args = list(outline.color = "black",
hc.order = TRUE,
tl.cex = 14),
title = "Correlogram for Starbucks dataset",
subtitle = "16 pairs are not significantly correlated at p < 0.05",
colors = c("#CC6600", "white", "#000066"), outline.color = "black",
ggtheme = theme_minimal()
)
spec_tbl_df [262 x 18] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Category : chr [1:262] "kids-drinks-and-other" "kids-drinks-and-other" "kids-drinks-and-other" "kids-drinks-and-other" ...
$ Name : chr [1:262] "Cinnamon Dolce Crème" "Cinnamon Dolce Crème" "Cinnamon Dolce Crème" "Cinnamon Dolce Crème" ...
$ Portion(fl oz) : num [1:262] 12 12 12 12 12 12 12 12 12 12 ...
$ Calories : num [1:262] 140 210 170 230 170 230 240 310 210 280 ...
$ Calories from fat : num [1:262] 45 100 50 110 0 60 80 140 50 100 ...
$ Total Fat(g) : num [1:262] 5 11 6 12 0 6 9 15 6 12 ...
$ Saturated fat(g) : num [1:262] 0 4 5 9 0 4 5 9 3.5 7 ...
$ Trans fat(g) : num [1:262] 0 0 0 0 0 0 0 0 0 0 ...
$ Cholesterol(mg) : num [1:262] 0 20 0 20 5 25 30 45 25 40 ...
$ Sodium(mg) : num [1:262] 120 125 130 135 120 125 120 125 135 140 ...
$ Total Carbohydrate(g): num [1:262] 25 27 28 30 32 34 32 34 32 34 ...
$ Dietary Fiber(g) : num [1:262] 1 1 0 1 0 0 0 0 0 0 ...
$ Sugars(g) : num [1:262] 22 25 26 28 31 33 31 33 31 33 ...
$ Protein(g) : num [1:262] 2 2 1 1 10 10 9 9 9 10 ...
$ Caffeine(mg) : chr [1:262] "0" "0" "0" "0" ...
$ Size : chr [1:262] "Tall" "Tall" "Tall" "Tall" ...
$ Milk : chr [1:262] "Almond" "Almond" "Coconut" "Coconut" ...
$ Whipped Cream : chr [1:262] "No Whipped Cream" "Whipped Cream" "No Whipped Cream" "Whipped Cream" ...
- attr(*, "spec")=
.. cols(
.. Category = col_character(),
.. Name = col_character(),
.. `Portion(fl oz)` = col_double(),
.. Calories = col_double(),
.. `Calories from fat` = col_double(),
.. `Total Fat(g)` = col_double(),
.. `Saturated fat(g)` = col_double(),
.. `Trans fat(g)` = col_double(),
.. `Cholesterol(mg)` = col_double(),
.. `Sodium(mg)` = col_double(),
.. `Total Carbohydrate(g)` = col_double(),
.. `Dietary Fiber(g)` = col_double(),
.. `Sugars(g)` = col_double(),
.. `Protein(g)` = col_double(),
.. `Caffeine(mg)` = col_character(),
.. Size = col_character(),
.. Milk = col_character(),
.. `Whipped Cream` = col_character()
.. )
- attr(*, "problems")=<externalptr>
| Portion(fl oz) | Calories | Calories from fat | Total Fat(g) | Saturated fat(g) | Trans fat(g) | |
|---|---|---|---|---|---|---|
| Min. : 8.00 | Min. : 90.0 | Min. : 0.00 | Min. : 0.000 | Min. : 0.000 | Min. :0.00000 | |
| 1st Qu.: 8.00 | 1st Qu.:192.5 | 1st Qu.: 50.00 | 1st Qu.: 6.000 | 1st Qu.: 2.000 | 1st Qu.:0.00000 | |
| Median :12.00 | Median :270.0 | Median : 80.00 | Median : 9.000 | Median : 4.750 | Median :0.00000 | |
| Mean :13.51 | Mean :283.8 | Mean : 83.24 | Mean : 9.177 | Mean : 5.149 | Mean :0.03053 | |
| 3rd Qu.:16.00 | 3rd Qu.:350.0 | 3rd Qu.:110.00 | 3rd Qu.:12.750 | 3rd Qu.: 7.000 | 3rd Qu.:0.00000 | |
| Max. :24.00 | Max. :650.0 | Max. :220.00 | Max. :24.000 | Max. :15.000 | Max. :0.50000 |
| Cholesterol(mg) | Sodium(mg) | Total Carbohydrate(g) | Dietary Fiber(g) | Sugars(g) | Protein(g) | Caffeine(mg) | |
|---|---|---|---|---|---|---|---|
| Min. : 0.00 | Min. : 10.0 | Min. :14.00 | Min. :0.00 | Min. :12.00 | Min. : 0.000 | Min. : 0.00 | |
| 1st Qu.: 0.00 | 1st Qu.:115.0 | 1st Qu.:28.00 | 1st Qu.:0.00 | 1st Qu.:25.00 | 1st Qu.: 4.000 | 1st Qu.: 0.00 | |
| Median :20.00 | Median :160.0 | Median :39.50 | Median :1.00 | Median :37.00 | Median : 7.000 | Median : 0.00 | |
| Mean :21.49 | Mean :172.4 | Mean :42.47 | Mean :1.79 | Mean :38.67 | Mean : 8.237 | Mean : 10.25 | |
| 3rd Qu.:30.00 | 3rd Qu.:210.0 | 3rd Qu.:53.00 | 3rd Qu.:3.00 | 3rd Qu.:48.75 | 3rd Qu.:12.000 | 3rd Qu.: 20.00 | |
| Max. :75.00 | Max. :460.0 | Max. :99.00 | Max. :7.00 | Max. :85.00 | Max. :19.000 | Max. :225.00 |
kids_drinks_and_other$Kids_drinks <- paste(kids_drinks_and_other$Name,"_",kids_drinks_and_other$Milk,"_", kids_drinks_and_other$`Whipped Cream`,"-", kids_drinks_and_other$Size)
sum(duplicated(kids_drinks_and_other))
[1] 0
kidsdrinks_matrix <- data.matrix(kids)
wh_d <- dist(kidsdrinks_matrix[,-c(1)], method = "euclidean")
dend_expend(wh_d)[[3]]
dist_methods hclust_methods optim
1 unknown ward.D 0.6015018
2 unknown ward.D2 0.6644386
3 unknown single 0.6790721
4 unknown complete 0.7399215
5 unknown average 0.8008273
6 unknown mcquitty 0.6972242
7 unknown median 0.4415643
8 unknown centroid 0.7779656